# -*- coding: utf-8 -*-
'''
Created on Apr 3, 2013

@author: LONG HOANG GIANG
'''

import pyLib
import gzip as _gzip
import base64
import os, json


PATH = "/longhoanggiang/daudm/"

OUTPUT = "/longhoanggiang/truyentext/truyencotich/"

pyLib.createIfNotExistsPath(OUTPUT + "files/")

data = []

def getDetail(path, fileId):
    fp = _gzip.open(path, 'rb')
    data = fp.read().replace('hindua88', '')
    fp.close()
    data = base64.decodestring(data)
    tree = pyLib.buildTree(data)
    data = pyLib.Etree.tostring(tree.xpath("//body/*"))
    print data
    pyLib.gzip(OUTPUT + 'files/' + fileId, json.dumps(data))
    
def process():    
    agiang = []
    for idx in range(1, 7):
        filename = "listitem.xml" if idx == 1 else "listitem{0}.xml".format(idx)
        folder = "data" if idx == 1 else "data{0}".format(idx)
        data = pyLib.file_get_content(PATH + filename)
        tree = pyLib.buildTree(data, pyLib.XML_PARSER)
        for item in tree.xpath("//item"):
            id = pyLib.stringify(item.xpath("./id"))
            name = pyLib.stringify(item.xpath("./name"))
            path = pyLib.stringify(item.xpath("./folder"))
            print id, name, path
            fileId = pyLib.crc32unsigned("{0}-{1}".format(name, path))
            getDetail(PATH + folder + "/" + path, fileId)
            agiang.append({'title': name, 'id': fileId})
    pyLib.gzip(OUTPUT + "data", json.dumps(agiang))
    
if __name__ == '__main__':
    
    process()
    print '> Finished'
    os._exit(1)
